In [1]:
import sys

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sys.path.append("./utils")
pd.options.plotting.backend = "plotly"
In [2]:
import loader
import round_classifier
import thresholds
from Parser import Parser
In [3]:
all_data = loader.load_dataset()
In [4]:
all_columns = all_data.columns
all_columns[0:16]
Out[4]:
Index(['time_left', 'ct_score', 't_score', 'map', 'bomb_planted', 'ct_health',
       't_health', 'ct_armor', 't_armor', 'ct_money', 't_money', 'ct_helmets',
       't_helmets', 'ct_defuse_kits', 'ct_players_alive', 't_players_alive'],
      dtype='object')
In [5]:
def assemble_df_weapons(_df, _weapons):
    pre_df = {}
    for i, weapon in enumerate(_weapons):
        key = weapon.replace("ct_weapon_", "").replace("t_weapon_", "")
        pre_df[key] = _df[weapon]
    return pd.DataFrame(pre_df)


def assemble_df_grenades(_df, _grenades):
    pre_df = {}
    for i, grenade in enumerate(_grenades):
        key = grenade.replace("ct_grenade_", "").replace("t_grenade_", "")
        pre_df[key] = _df[grenade]
    return pd.DataFrame(pre_df)
In [6]:
# Weapons Percentage
weapons = [
    "ct_weapon_ak47",
    "t_weapon_ak47",
    "ct_weapon_aug",
    "t_weapon_aug",
    "ct_weapon_awp",
    "t_weapon_awp",
    "ct_weapon_bizon",
    "t_weapon_bizon",
    "ct_weapon_cz75auto",
    "t_weapon_cz75auto",
    "ct_weapon_elite",
    "t_weapon_elite",
    "ct_weapon_famas",
    "t_weapon_famas",
    "ct_weapon_g3sg1",
    "t_weapon_g3sg1",
    "ct_weapon_galilar",
    "t_weapon_galilar",
    "ct_weapon_glock",
    "t_weapon_glock",
    "ct_weapon_m249",
    "t_weapon_m249",
    "ct_weapon_m4a1s",
    "t_weapon_m4a1s",
    "ct_weapon_m4a4",
    "t_weapon_m4a4",
    "ct_weapon_mac10",
    "t_weapon_mac10",
    "ct_weapon_mag7",
    "t_weapon_mag7",
    "ct_weapon_mp5sd",
    "t_weapon_mp5sd",
    "ct_weapon_mp7",
    "t_weapon_mp7",
    "ct_weapon_mp9",
    "t_weapon_mp9",
    "ct_weapon_negev",
    "t_weapon_negev",
    "ct_weapon_nova",
    "t_weapon_nova",
    "ct_weapon_p90",
    "t_weapon_p90",
    "ct_weapon_r8revolver",
    "t_weapon_r8revolver",
    "ct_weapon_sawedoff",
    "t_weapon_sawedoff",
    "ct_weapon_scar20",
    "t_weapon_scar20",
    "ct_weapon_sg553",
    "t_weapon_sg553",
    "ct_weapon_ssg08",
    "t_weapon_ssg08",
    "ct_weapon_ump45",
    "t_weapon_ump45",
    "ct_weapon_xm1014",
    "t_weapon_xm1014",
    "ct_weapon_deagle",
    "t_weapon_deagle",
    "ct_weapon_fiveseven",
    "t_weapon_fiveseven",
    "ct_weapon_usps",
    "t_weapon_usps",
    "ct_weapon_p250",
    "t_weapon_p250",
    "ct_weapon_p2000",
    "t_weapon_p2000",
    "ct_weapon_tec9",
    "t_weapon_tec9",
]

t_weapons = list(filter(lambda el: el[0:1] == "t", weapons))
display(
    assemble_df_weapons(all_data, t_weapons)
    .sum()
    .sort_values(ascending=True)
    .plot.barh(title="T Weapons")
)
ct_weapons = list(filter(lambda el: el[0:2] == "ct", weapons))
display(
    assemble_df_weapons(all_data, ct_weapons)
    .sum()
    .sort_values(ascending=True)
    .plot.barh(title="CT Weapons")
)
In [7]:
grenades = [
    "ct_grenade_hegrenade",
    "t_grenade_hegrenade",
    "ct_grenade_flashbang",
    "t_grenade_flashbang",
    "ct_grenade_smokegrenade",
    "t_grenade_smokegrenade",
    "ct_grenade_incendiarygrenade",
    "t_grenade_incendiarygrenade",
    "ct_grenade_molotovgrenade",
    "t_grenade_molotovgrenade",
    "ct_grenade_decoygrenade",
    "t_grenade_decoygrenade",
]

t_grenades = list(filter(lambda el: el[0:1] == "t", grenades))
display(
    assemble_df_weapons(all_data, t_grenades)
    .sum()
    .sort_values(ascending=True)
    .plot.barh(title="T Grenades")
)
ct_grenades = list(filter(lambda el: el[0:2] == "ct", grenades))
display(
    assemble_df_weapons(all_data, ct_grenades)
    .sum()
    .sort_values(ascending=True)
    .plot.barh(title="CT Grenades")
)
In [8]:
parser = Parser()
treat_data = parser.create_round_column(all_data.copy())
treat_data = parser.classify_weapons(treat_data.copy())
treat_data = parser.create_round_winner_columns(treat_data.copy())
In [9]:
treat_data["pistol_round"] = treat_data.apply(
    round_classifier.define_pistol_round, axis=1
)

# Defining eco rounds
treat_data["ct_eco"] = treat_data.apply(round_classifier.define_full_ct_eco, axis=1)
treat_data["t_eco"] = treat_data.apply(round_classifier.define_full_t_eco, axis=1)

# Defining force buy rounds
treat_data["ct_force"] = treat_data.apply(
    round_classifier.define_force_ct_round, axis=1
)
treat_data["t_force"] = treat_data.apply(round_classifier.define_force_t_round, axis=1)
In [10]:
# Não acho que são as colunas certas pra analisar, mas só um exemplo do que podemos fazer

n_bins = 7
lower, higher = thresholds.prepare(treat_data.copy(), "ct_health")
treat_data["ct_health_thresholds"] = thresholds.build(
    treat_data.copy(), "ct_health", lower, higher, n_bins
)
treat_data["ct_health_thresholds_num"] = thresholds.build_numerical(
    treat_data.copy(), "ct_health", lower, higher, n_bins
)
In [11]:
# Não acho que são as colunas certas pra analisar, mas só um exemplo do que podemos fazer

n_bins = 7
lower, higher = thresholds.prepare(treat_data.copy(), "t_health")
treat_data["t_health_thresholds"] = thresholds.build(
    treat_data.copy(), "t_health", lower, higher, n_bins
)
treat_data["t_health_thresholds_num"] = thresholds.build_numerical(
    treat_data.copy(), "t_health", lower, higher, n_bins
)
In [12]:
# Selecting columns
normal_columns = all_columns[0:16].tolist()
made_columns_t = [
    "t_main_rifle",
    "t_sec_rifle",
    "t_force_weapons",
    "t_weak_pistols",
    "t_strong_pistols",
    "t_granades",
    "t_weapon_awp",
    "t_eco",
    "t_force",
]
made_columns_ct = [
    "ct_main_rifle",
    "ct_sec_rifle",
    "ct_force_weapons",
    "ct_weak_pistols",
    "ct_strong_pistols",
    "ct_granades",
    "ct_weapon_awp",
    "ct_eco",
    "ct_force",
]
extra_columns = [
    "pistol_round",
    "round",
    "round_winner",
    "round_winner_t",
    "round_winner_ct",
    "t_health_thresholds",
    "t_health_thresholds_num",
    "ct_health_thresholds",
    "ct_health_thresholds_num",
]
selected_columns = normal_columns + made_columns_ct + made_columns_t + extra_columns

# Getting only columns selected columns
final_df = treat_data[selected_columns]
In [13]:
final_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122410 entries, 0 to 122409
Data columns (total 43 columns):
 #   Column                    Non-Null Count   Dtype   
---  ------                    --------------   -----   
 0   time_left                 122410 non-null  float64 
 1   ct_score                  122410 non-null  float64 
 2   t_score                   122410 non-null  float64 
 3   map                       122410 non-null  object  
 4   bomb_planted              122410 non-null  bool    
 5   ct_health                 122410 non-null  float64 
 6   t_health                  122410 non-null  float64 
 7   ct_armor                  122410 non-null  float64 
 8   t_armor                   122410 non-null  float64 
 9   ct_money                  122410 non-null  float64 
 10  t_money                   122410 non-null  float64 
 11  ct_helmets                122410 non-null  float64 
 12  t_helmets                 122410 non-null  float64 
 13  ct_defuse_kits            122410 non-null  float64 
 14  ct_players_alive          122410 non-null  float64 
 15  t_players_alive           122410 non-null  float64 
 16  ct_main_rifle             122410 non-null  float64 
 17  ct_sec_rifle              122410 non-null  float64 
 18  ct_force_weapons          122410 non-null  float64 
 19  ct_weak_pistols           122410 non-null  float64 
 20  ct_strong_pistols         122410 non-null  float64 
 21  ct_granades               122410 non-null  float64 
 22  ct_weapon_awp             122410 non-null  float64 
 23  ct_eco                    122410 non-null  int64   
 24  ct_force                  122410 non-null  int64   
 25  t_main_rifle              122410 non-null  float64 
 26  t_sec_rifle               122410 non-null  float64 
 27  t_force_weapons           122410 non-null  float64 
 28  t_weak_pistols            122410 non-null  float64 
 29  t_strong_pistols          122410 non-null  float64 
 30  t_granades                122410 non-null  float64 
 31  t_weapon_awp              122410 non-null  float64 
 32  t_eco                     122410 non-null  int64   
 33  t_force                   122410 non-null  int64   
 34  pistol_round              122410 non-null  int64   
 35  round                     122410 non-null  float64 
 36  round_winner              122410 non-null  object  
 37  round_winner_t            122410 non-null  int64   
 38  round_winner_ct           122410 non-null  int64   
 39  t_health_thresholds       122410 non-null  category
 40  t_health_thresholds_num   122410 non-null  category
 41  ct_health_thresholds      122410 non-null  category
 42  ct_health_thresholds_num  122410 non-null  category
dtypes: bool(1), category(4), float64(29), int64(7), object(2)
memory usage: 36.1+ MB
In [14]:
# checando nans
final_df.isna().sum()
Out[14]:
time_left                   0
ct_score                    0
t_score                     0
map                         0
bomb_planted                0
ct_health                   0
t_health                    0
ct_armor                    0
t_armor                     0
ct_money                    0
t_money                     0
ct_helmets                  0
t_helmets                   0
ct_defuse_kits              0
ct_players_alive            0
t_players_alive             0
ct_main_rifle               0
ct_sec_rifle                0
ct_force_weapons            0
ct_weak_pistols             0
ct_strong_pistols           0
ct_granades                 0
ct_weapon_awp               0
ct_eco                      0
ct_force                    0
t_main_rifle                0
t_sec_rifle                 0
t_force_weapons             0
t_weak_pistols              0
t_strong_pistols            0
t_granades                  0
t_weapon_awp                0
t_eco                       0
t_force                     0
pistol_round                0
round                       0
round_winner                0
round_winner_t              0
round_winner_ct             0
t_health_thresholds         0
t_health_thresholds_num     0
ct_health_thresholds        0
ct_health_thresholds_num    0
dtype: int64
In [15]:
# Visualizando as correlações
plt.subplots(figsize=(24, 18))
sns.heatmap(final_df.corr(), annot=True)
plt.show()
In [16]:
total = final_df["round_winner"].count()
CT = (final_df.round_winner.value_counts().CT) / total
T = (len(final_df[final_df["round_winner"] == "T"])) / total

sizes = [T, CT]
labels = ["T", "CT"]

fig1, ax1 = plt.subplots()
ax1.set_title("Round Winners", color="white")
ax1.pie(sizes, labels=labels, autopct="%1.1f%%", shadow=True, startangle=90)
ax1.axis("equal")

plt.show()
In [17]:
score_per_map = (
    final_df[final_df["map"] != "de_cache"]
    .groupby(by="map")
    .sum()[["round_winner_ct", "round_winner_t"]]
)
score_per_map["Contra Terrorista"] = (
    100
    * score_per_map["round_winner_ct"]
    / (score_per_map["round_winner_ct"] + score_per_map["round_winner_t"])
)
score_per_map["Terrorista"] = (
    100
    * score_per_map["round_winner_t"]
    / (score_per_map["round_winner_ct"] + score_per_map["round_winner_t"])
)
score_per_map["win_diff"] = (
    score_per_map["Terrorista"] - score_per_map["Contra Terrorista"]
)
score_per_map
Out[17]:
round_winner_ct round_winner_t Contra Terrorista Terrorista win_diff
map
de_dust2 10158 11986 45.872471 54.127529 8.255058
de_inferno 10810 13001 45.399185 54.600815 9.201629
de_mirage 9144 9432 49.224806 50.775194 1.550388
de_nuke 10214 8811 53.687254 46.312746 -7.374507
de_overpass 7026 7055 49.897024 50.102976 0.205951
de_train 7310 6181 54.184271 45.815729 -8.368542
de_vertigo 5239 5898 47.041394 52.958606 5.917213
In [18]:
import plotly.express as px

df = px.data.tips()
fig = px.bar(
    score_per_map,
    x=score_per_map.index,
    labels={
        "value": "Rounds ganhos(%)",
        "map": "Mapa",
        "variable": "Lado",
    },
    y=["Contra Terrorista", "Terrorista"],
    barmode="group",
)
fig.show()
In [19]:
time_slots = final_df.copy()
time_slots["time_left"] = pd.cut(time_slots["time_left"], 75)
time_slots["time_left"] = (
    time_slots["time_left"].apply(lambda row: (row.left)).astype(int)
)
In [20]:
defuse_per_time = time_slots.groupby(by="time_left").sum()
defuse_per_time = defuse_per_time[defuse_per_time.index < 120]
df = px.data.tips()
fig = px.bar(
    defuse_per_time,
    x=defuse_per_time.index,
    labels={"bomb_planted": "Bombas Plantadas", "time_left": "Tempo Restante"},
    y="bomb_planted",
)
fig.show()
In [21]:
money_per_time = time_slots.groupby(by="time_left").mean()
money_per_time = money_per_time[money_per_time.index < 120]
money_per_time.index = money_per_time.index.astype("category")
df = px.data.tips()
fig = px.bar(
    money_per_time,
    x=money_per_time.index,
    labels={
        "value": "Vida do time",
        "time_left": "Tempo Restante",
        "variable": "Lado",
    },
    y=["ct_health", "t_health"],
    barmode="group",
)
fig.show()
In [ ]: